getwd()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Packages
set.seed(717)
library(cubature)
library(lava) 
library(ggplot2)
library(mcGlobaloptim) 
library(DiceKriging) 
library(nloptr) 
library(MASS)
library(mcmc)
library(geoR)
library(RobustCalibration)
library(lhs)
library(RobustGaSP)
library(numDeriv)
library(ggplot2)
library(rTensor)
library(plotly)
library(tensorA)
library(proxy)
library(abind)
library(scatterplot3d)
library(Matrix)

load("s1-ini-setting-p-1.RData")

## Model: f(x) = B*U*g(x); B: 3*2, U: 3*3, x:2*2
## Packages
set.seed(717)
library(cubature)
library(lava) 
library(ggplot2)
library(mcGlobaloptim) 
library(DiceKriging) 
library(nloptr) 
library(MASS)
library(mcmc)
library(geoR)
library(RobustCalibration)
library(lhs)
library(RobustGaSP)
library(numDeriv)
library(ggplot2)
library(rTensor)
library(plotly)
library(tensorA)
library(proxy)
library(abind)
library(scatterplot3d)
library(Matrix)

load("s3-ini-setting-p-1.RData")

## Model: f(x) = B*U*g(x); B: 3*2, U: 3*3, x:2*2
dim.b = c(3,3,3); dim.u1 = c(4,3); dim.u2 = c(5,3); dim.x = c(2,3)
dim.f = c(4,5,2); t1 = dim.f[1]; t2 = dim.f[2]; t3 = dim.f[3]; 
dim.h = prod(dim.f); dim.mode = length(dim.f)
d = 3; lower.x = rep(0,d); upper.x = rep(1,d)
dim.mode.ml = dim.mode+1; dim.f.ml = c(dim.f,d)
k = ceiling(dim.h/6)

B <- e3.ini.set.p$B
U_mat <- e3.ini.set.p$U_mat
V <- e3.ini.set.p$V

true.model <- function(x){
  X1 = sin(5*x); X2 = cos(x)
  X = matrix(cbind(X1,X2),dim.x)
  return(array(ttm(V, X, m = 3)@data,dim.f))
}

true.h <- function(x,ind) {
  sele.f <- true.model(x)[ind]
  return(sum(sele.f))
}

ora.h <- function(x,k) {
  sele.f <- sort(true.model(x), decreasing = TRUE)[1:k]
  return(sum(sele.f))
}

ora.to.re = directL(function(x0) -ora.h(x0,k),lower.x,upper.x,control=list(xtol_rel=1e-8, maxeval=1000))
ora.x.star = ora.to.re$par
ora.x.star = t(as.matrix(ora.x.star))

ora.ind.star = arrayInd(order(true.model(ora.x.star), decreasing = TRUE)[1:k],dim.f)
ora.f.star = true.model(ora.x.star)
ora.h.star = ora.h(ora.x.star,k)


pos <- function(ind) array(1:dim.h,dim.f)[ind] #(p[3]-1)*(t1*t2)+(p[2]-1)*t1 +p[1]

e.ind <- function(ind){
  e0 = pos(ind); ord = cbind(1:k,e0)
  e.re = matrix(0,k,dim.h)
  e.re[ord] = 1
  return(e.re)
}


################################################################################
#### GP ########################################################################
################################################################################

################################################################################
## Our proposed method: NS-TOGP
## Kernel
phi.x <- function(x) kronecker(diag(dim.h),x)
u.k <- function(t,ome) matrix(ome,t,t)

vec.lab = list()
for(om.lab in 1:dim.mode.ml){
  vec.lab[[om.lab]] = dim.f.ml[om.lab]^2
}
vec.lab[[dim.mode.ml+1]] = vec.lab[[dim.mode.ml+2]] = 1
group.lab <- unlist(Map(rep, LETTERS[1:length(vec.lab)], unlist(vec.lab)))
dim.hyper.ml = length(group.lab)

lower.th = c(unlist(Map(rep, c(rep(1e-3,dim.mode.ml),1e-2,1e-10), unlist(vec.lab))))
upper.th = c(unlist(Map(rep, c(rep(1,dim.mode.ml),10,1e-2), unlist(vec.lab))))

ker.ml <- function(the){
  the0 = split(the, group.lab)
  sig = list()
  for(i in 1:dim.mode.ml){
    sig[[i]] = u.k(dim.f.ml[i],the0[[i]])
  }
  return(sig)
}
# ker.ml(c(runif(dim.hyper.ml)))


likeli.ml <- function(x,y,ind,n,the){
  the0 = split(the, group.lab)
  ome = list()
  for(i in 1:dim.mode.ml){ome[[i]] = the0[[i]]}
  sig2 = the0[[dim.mode.ml+1]]; tau2 = the0[[dim.mode.ml+2]]
  
  e.ini = as.matrix(bdiag(lapply(ind,e.ind)))
  
  sig = ker.ml(the); U = Reduce(kronecker,sig)
  Ut = e.ini%*%phi.x(x)%*%U; sv <- svd(Ut)$d
  ld = sum(log(c(sv^2, rep(0, n*dim.h-length(sv)))+tau2))
  
  e.ini = as.matrix(bdiag(lapply(ind,e.ind)))
  
  par.k.y = t(Ut)%*%Ut + tau2*diag(ncol(Ut)); par.sol.k.y = solve(par.k.y)
  
  log.likeli = 1/tau2*t(c(y))%*%c(y)-1/tau2*t(c(y))%*%Ut%*%par.sol.k.y%*%t(Ut)%*%c(y)+ 
    ld+Reduce(sum,sapply(sig, function(x) x^2))
  
  return(list(like=log.likeli, the0=the0))
}
# likeli.ml(x,y,k.ind,n,runif(dim.hyper.ml))

EIJ <- function(i,j,l){
  E0 = matrix(0,dim.f.ml[l],dim.f.ml[l]); E0[i,j] = 1
  return(E0)
}

der.l <- function(x,y,ind,n,the){
  the0 = split(the, group.lab)
  
  ome = list()
  for(i in 1:dim.mode.ml){ome[[i]] = the0[[i]]}
  sig2 = the0[[dim.mode.ml+1]]; tau2 = the0[[dim.mode.ml+2]]
  
  e.ini = as.matrix(bdiag(lapply(ind,e.ind)))
  
  sig = list()
  for(i in 1:dim.mode.ml){
    sig[[i]] = u.k(dim.f.ml[i],the0[[i]])
  }
  
  u.brave = e.ini%*%phi.x(x)%*%Reduce(kronecker,sig)
  k.ini = u.brave%*%t(u.brave)
  k.y = sig2*k.ini+tau2*diag(n*k)
  sol.k.y = solve(k.y)
  
  al.k = sol.k.y%*%c(y)
  der.l.sig2 = tr(sol.k.y%*%k.ini)-t(al.k)%*%k.ini%*%al.k
  der.l.tau2 = tr(sol.k.y)-t(al.k)%*%al.k
  
  al.k1 = t(e.ini%*%phi.x(x))%*%al.k
  der.l.phi = list()
  for(l in 1:dim.mode.ml){
    der.l.phi[[l]] = matrix(0, dim.f.ml[l], dim.f.ml[l])
    
    der.l.phi.ij <- function(i,j) EIJ(i,j,l)%*%t(sig[[l]])+sig[[l]]%*%EIJ(j,i,l)
    it1 <- function(der.p){
      list1 = if (l > 1) sig[1:(l-1)] else 1
      list2 = if (l < dim.mode.ml) sig[(l+1):dim.mode.ml] else 1
      kro.list = list(Reduce(kronecker,list1),der.p,Reduce(kronecker,list2))
      return(Reduce(kronecker,kro.list))
    } 
    it2 <- function(der.p) tr(t(e.ini%*%phi.x(x))%*%sol.k.y%*%e.ini%*%phi.x(x)%*%it1(der.p))-
      t(al.k1)%*%it1(der.p)%*%al.k1
    
    der.l.phi.1 = sapply(c(1:dim.f.ml[l]), function(i) {
      sapply(c(1:dim.f.ml[l]), function(j) it2(der.l.phi.ij(i, j)))
    })
    
    for (i in 1:dim.f.ml[l]) {
      der.l.phi[[l]][i, 1:dim.f.ml[l]] <- der.l.phi.1[[i]]
    }
  }
  
  result = list(der.l.phi=der.l.phi, der.l.sig2=der.l.sig2, der.l.tau2=der.l.tau2)
  return(result)
}
# unlist(der.l(x,y,k.ind,n,runif(dim.hyper.ml)))


mlgp.hat <- function(x.new,ind.new,x,y,ind,n,n.test,hy){
  x.new = matrix(x.new,n.test,d)
  e.new = as.matrix(bdiag(lapply(ind.new,e.ind)))
  e.ini = as.matrix(bdiag(lapply(ind,e.ind)))
  
  sig2 = hy[[dim.mode.ml+1]]; tau2 = hy[[dim.mode.ml+2]]
  
  sig = ker.ml(unlist(hy)); U = Reduce(kronecker,sig)
  Ut = e.ini%*%phi.x(x)%*%U
  
  k.ini = Ut%*%t(Ut)
  k.y = sig2*k.ini + tau2*diag(n*k)
  
  k.test = sig2*phi.x(x.new)%*%U%*%t(Ut)
  k.test0 = sig2*phi.x(x.new)%*%U%*%t(phi.x(x.new)%*%U)
  
  k.oth = k.test%*%solve(k.y)
  
  f.hat = k.oth%*%c(y)
  var.hat = k.test0-k.oth%*%t(k.test)
  
  par.k.test = sig2*e.new%*%phi.x(x.new)%*%U%*%t(Ut)
  par.k.test0 = sig2*e.new%*%phi.x(x.new)%*%U%*%t(e.new%*%phi.x(x.new)%*%U)
  
  par.k.oth = e.new%*%k.test%*%solve(k.y)
  
  par.f.hat = par.k.oth%*%c(y)
  par.var.hat = par.k.test0-par.k.oth%*%t(par.k.test)
  
  result = list(mean = f.hat, cov = var.hat, par.mean = par.f.hat, par.cov = par.var.hat)
  return(result)
}
# mlgp.hat(ora.x.star,list(ora.ind.star),x,y,k.ind,n,1,split(runif(dim.hyper.ml), group.lab))




################################################################################
## Our proposed method: NS-mlGP-UCB
n = 5*d; m  = 10*d; lambda=0.1; J.for=10

like.re.ml = hyper.ml = lapply(1:J.for, function(x) list())
x0.ml = y0.ml = ind0.ml = list(); fhat = lapply(1:J.for, function(x) list())
mlgp.bo = h.ml = list()
mse.x.ml = mae.y.ml = list(); tab.ind.ml = list()
regret.ml = ins.regret.ml = cum.regret.ml = list()
beta.ml = ucb.new.ml = alpha.ml = lapply(1:J.for, function(x) list())


for(j.for.ml in 1:J.for){
  x = e3.ini.set.p$x 
  k.ind = e3.ini.set.p$k.ind
  y = t(e3.ini.set.p$y.for[[j.for.ml]])
  
  
  ## Setting
  ######################################## BO ####################################
  hyper.ml.old = directL(function(the) likeli.ml(x,y,k.ind,n,the)$like,lower.th,upper.th,control=list(maxeval=1000))$par
  hyper.ml.new = optim(par = hyper.ml.old, 
                       fn = function(the) likeli.ml(x,y,k.ind,n,the)$like,
                       gr = function(the) unlist(der.l(x,y,k.ind,n,the)), 
                       method = "L-BFGS-B",lower = lower.th, upper = upper.th)$par
  like.re.ml[[j.for.ml]][[1]] = likeli.ml(x,y,k.ind,n,hyper.ml.new)
  hyper.ml[[j.for.ml]][[1]] = like.re.ml[[j.for.ml]][[1]]$the0

  x0.ml[[j.for.ml]] = x; y0.ml[[j.for.ml]] = y; n.ml = n
  ind0.ml[[j.for.ml]] = k.ind
  
  x.new.ml = t(as.matrix(x[which.max(apply(y,2,sum)),]))
  y.new.ml = y[which.max(apply(y,2,sum)),]
  ind.new.ml = list(k.ind[[which.max(apply(y,2,sum))]])
  
  hyper.ml.ucb = unlist(hyper.ml[[j.for.ml]][[1]]); delta.ml = 0.05
  fhat[[j.for.ml]][[1]] = mlgp.hat(x.new.ml,ind.new.ml,x,y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,1,hyper.ml[[j.for.ml]][[1]])
  
  for(i.ml in 1:m){
    sig2 = hyper.ml[[j.for.ml]][[i.ml]][[dim.mode.ml+2]]
    eta = hyper.ml[[j.for.ml]][[i.ml]][[dim.mode.ml+2]]
    it1 = Map(function(A) determinant(diag(dim.h)+1/eta*A[[2]],logarithm=TRUE)$modulus,fhat[[j.for.ml]])
    beta.ml[[j.for.ml]][[i.ml]] = sqrt(Matrix::norm(fhat[[j.for.ml]][[1]]$mean,type="2"))+
      sqrt(sig2/eta)*sqrt(2*log(1/delta.ml)+Reduce(sum,it1))
    
    alpha.ml[[j.for.ml]][[i.ml]] = 2*log(dim.h*i.ml^2*m*6/(pi^2*delta.ml))
    
    ucb.ml <- function(x.new,beta){
      x.new = matrix(x.new,1,d)
      mlgp.output = mlgp.hat(x.new,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml]])
      ucb = sum(mlgp.output$mean)+sqrt(beta)*sqrt(Matrix::norm(mlgp.output$cov,type="2"))
      return(ucb)
    }
    x.new.ml = bobyqa(x.new.ml, function(x.new) -ucb.ml(x.new,beta.ml[[j.for.ml]][[i.ml]]),
                      lower=lower.x,upper=upper.x)$par
    
    ml.output = mlgp.hat(x.new.ml,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml]])
    ucb.new.ml[[j.for.ml]][[i.ml]] = array(ml.output$mean + sqrt(alpha.ml[[j.for.ml]][[i.ml]])*sqrt(diag(ml.output$cov)), dim.f)
    ind.new.ml = list(arrayInd(order(ucb.new.ml[[j.for.ml]][[i.ml]], decreasing = TRUE)[1:k],dim.f))
    
    ucb.new.ml[[j.for.ml]][[i.ml]] = ucb.ml(x.new.ml,beta.ml[[j.for.ml]][[i.ml]])
    # x.new.ml = t(t(randomLHS(1,d))*(upper.x-lower.x) + lower.x)
    # ind.new.ml = list(arrayInd(sample(dim.h,k),dim.f))
    
    fhat[[j.for.ml]][[i.ml+1]] = mlgp.hat(x.new.ml,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml]])
    y.new.ml = true.model(x.new.ml)[ind.new.ml[[1]]]+rnorm(k,mean=0,sd=lambda)
    
    x0.ml[[j.for.ml]] = rbind(x0.ml[[j.for.ml]], x.new.ml)
    y0.ml[[j.for.ml]] = rbind(y0.ml[[j.for.ml]], y.new.ml)
    ind0.ml[[j.for.ml]] = append(ind0.ml[[j.for.ml]], ind.new.ml)
    
    n.ml = n+i.ml
    
    if(i.ml %% 5 == 0){
      hyper.ml.ucb = optim(par = unlist(hyper.ml[[j.for.ml]][[i.ml]]),
                           fn = function(the) likeli.ml(x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,the)$like,
                           gr = function(the) unlist(der.l(x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,the)), 
                           method = "L-BFGS-B",lower = lower.th, upper = upper.th)$par
    }else{
      hyper.ml.ucb = hyper.ml.ucb
    }
    
    like.re.ml[[j.for.ml]][[i.ml+1]] = likeli.ml(x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,hyper.ml.ucb)
    hyper.ml[[j.for.ml]][[i.ml+1]] = like.re.ml[[j.for.ml]][[i.ml+1]]$the0
    print(i.ml)
  }
  
  mlgp.bo[[j.for.ml]] = Map(function(a,b) true.model(a)[b],split(x0.ml[[j.for.ml]],row(x0.ml[[j.for.ml]])),ind0.ml[[j.for.ml]])
  h.ml[[j.for.ml]] = Map(function(a,b) true.h(a,b),split(x0.ml[[j.for.ml]],row(x0.ml[[j.for.ml]])),ind0.ml[[j.for.ml]])
  
  mse.x.ml[[j.for.ml]] = apply(x0.ml[[j.for.ml]],1,function(x) mean((x-ora.x.star)^2))
  mae.y.ml[[j.for.ml]] = unlist(lapply(fhat[[j.for.ml]],function(a) mean(abs((a$par.mean-c(ora.f.star[ora.ind.star]))/c(ora.f.star[ora.ind.star])))))
  tab.ind.ml[[j.for.ml]] = unlist(lapply(ind0.ml[[j.for.ml]],function(a) sum(apply(a, 1, function(row) any(duplicated(rbind(row, ora.ind.star)))))))#unlist(lapply(ind0.ml[[j.for.ml]],function(a) sum(apply(a == ora.ind.star, 1, all))))
  
  regret.ml[[j.for.ml]] = ora.h.star-unlist(h.ml[[j.for.ml]])
  ins.regret.ml[[j.for.ml]] = ora.h.star-cummax(unlist(h.ml[[j.for.ml]]))
  cum.regret.ml[[j.for.ml]] = cumsum(ins.regret.ml[[j.for.ml]])
  
  layout(matrix(1, nrow = 1, ncol = 1))
  plot(rep(ora.h.star,(n.ml-n+1)),type="b",lwd=3,lty=1,pch=1,col=1,ylim=c(cummax(unlist(h.ml[[j.for.ml]]))[n],ora.h.star))
  lines(cummax(unlist(h.ml[[j.for.ml]]))[n:n.ml],type="b",lwd=3,lty=2,pch=2,col=2)
  
  plot(log(cummin(regret.ml[[j.for.ml]])),type="b",lwd=3,lty=1,pch=1,col=1)
  
  print(j.for.ml)
}


pml.ucb.list = list(like.re.ml=like.re.ml, hyper.ml=hyper.ml, 
                    ind0.ml=ind0.ml, tab.ind.ml=tab.ind.ml,
                    x0.ml=x0.ml, y0.ml=y0.ml, mlgp.bo=mlgp.bo, h.ml=h.ml, 
                    mse.x.ml=mse.x.ml, mae.y.ml=mae.y.ml, 
                    regret.ml=regret.ml, ins.regret.ml=ins.regret.ml, cum.regret.ml=cum.regret.ml,
                    beta.ml=beta.ml, alpha.ml=alpha.ml, ucb.new.ml=ucb.new.ml)
save(pml.ucb.list, file="s3.pml.ucb.RData")


